
* STEP 1 . Prepare replication data from Gollin and Udry (2021)
*---------------------------------------------------------
 

 
******* Tanzania*********

* Define file paths
global do = "${mystart}/Heterogeneity and Misallocation/do"
global allwave = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania"

global wave2008 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2008_LSMS"
global wave2010 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2010_LSMS"
global wave2012 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2012_LSMS_v01_M_STATA_English_labels"
global megan = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/tanzania files from Megan"
global data = "${mystart}/Heterogeneity and Misallocation/Data/TzWorking"
global temp = "${mystart}/Heterogeneity and Misallocation/Data/TzWorking/Temp"
global results = "${mystart}/Heterogeneity and Misallocation/Results/Tanzania"
global ccresults = "${mystart}/Heterogeneity and Misallocation/Results/crosscountry"
global raindir = "${mystart}/Heterogeneity and Misallocation/Data"

 
 *A. Obtain GU's microdata for Tanzania
 *--------------
  
  
   do "$do/TZ_data_prep.do"

 
 ***EXTRACT frpm TZ_prod_fnct.do
 
global do = "${mystart}/Heterogeneity and Misallocation/do"
global wave2008 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2008_LSMS"
global wave2010 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2010_LSMS"
global wave2012 = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/TZA_2012_LSMS_v01_M_STATA_English_labels"
global megan = "${mystart}/Heterogeneity and Misallocation/Data/LSMS Tanzania/tanzania files from Megan"
global data = "${mystart}/Heterogeneity and Misallocation/Data/TzWorking"
global temp = "${mystart}/Heterogeneity and Misallocation/Data/TzWorking/Temp"
global results = "${mystart}/Heterogeneity and Misallocation/Results/Tanzania"
global ccresults = "${mystart}/Heterogeneity and Misallocation/Results/crosscountry"




*
* Section 0 : Setup
*
{
version 14
version 16

*Check if filepaths have been established using Tanzania_initialise.do

qui if "$data"=="" {
	display as error "Please run Tanzania_initialise.do first."
	error 1
}

global lqvars  "lnsale_value lnsale_valuem dist_home dist_road _LSQ* _LST*"   
*global lqvars  "dist_home dist_road _LSQ* _LST*"   
*global lqvars  "lnsale_value lnsale_valuem dist_home _LSQ* _LST*"   
global xqvars "single_manager health healthm _XLI* Xgender av_age av_agem "


global qvars ="_YC*"

*Plot specific shocks

global plotshock " crops_lost less_h _STd* _STc* _STW* _SQd* _SQc* _SQW* "
global plotshock " crops_lost _SQc* _SQa*"        
global plotshock " crops_lost less_h _SQl* _SQd* _SQc* _SQW*"  
*global plotshock " crops_lost less_h _SQd* _SQc* _SQa*"        
global plotshock " crops_lost  _SQd* _SQc* _SQW*" 
global plotshock " crops_lost  _SQd* _SQc* _SQW* _STd* _STc*"  //best so far 
global plotshock " crops_lost  _SQd* _SQc* _SQW*  _STW*"

*HH shocks 
global hhshocknf "livestock_death illness death_hhm burglary"
global hhshockf "drought_floods crop_disease water_shortage"  
global hhshockf "water_shortage"	//best so far
*community shocks
global commshock "evimax "
global commshock "anntot "

global instr "$hhshocknf"

global RHS "$qvars $lqvars $xqvars $plotshock $hhshockf "    

use "${data}/Tanzania_panel.2.0.dta", clear

*
*Prepare regressors
*


gen grain = CropID< 20    // maize and grains
gen cassava = CropID>=20&CropID<30  //cassava and tubers
gen legume = CropID>=30&CropID<40    //legumes
gen fruit = CropID>=70 & CropID<80   //bannana and orchard
gen otherc = grain==0&cassava==0&legume==0&fruit==0



drop if CropID>=50&CropID<60  //coffee and tea
drop if fallow==1

egen yc = group(year grain cassava legume fruit otherc)
xi i.yc, pre(_YC)

egen yearcropfarmerID = group(HHID main_manager year grain cassava legume fruit otherc)

*land quality regressors
winsor sale_value, p(.05) g(sale_valuec)
gen lnsale_value = ln(sale_valuec)
gen lnsale_valuem = lnsale_value==.
replace lnsale_value=0 if lnsale_value==.
replace slope=0 if slope==.
xi, prefix(_LSL) i.slope
replace soil_quality = 0 if soil_quality==.
xi, prefix(_LSQ) i.soil_quality
replace soil_type = 0 if soil_type==.
xi, prefix(_LST) i.soil_type

drop _LSTsoil_ty_4   ///collinear



*labor characteristics
gen healthm= health==.
replace health=0 if health==.
replace literacy1 = 9 if literacy1== .
*xi, prefix(_XLI) i.literacy1   
replace literacy = 9 if literacy==.
xi, prefix(_XLI) i.literacy  

gen Xgender = gender_manager1
replace Xgender = 1 if gender_manager1==.
gen av_agem = av_age==.
replace av_age=999 if av_age==.

foreach var of varlist $hhshocknf $hhshockf {
	replace `var'=0 if `var'==.
	}



*selected interactions of household and community shocks with Plot Characteristics
*following Uganda

*August 2018 - collinearity across these 3
gen anntotsq=anntot^2
gen wetQsq = wetQ^2
xi i.soil_quality|wetQ, pre(_SQW)
drop _SQWsoil*
xi i.soil_quality|crop_d , pre(_SQc)
drop _SQcsoil*
xi i.soil_quality|anntot , pre(_SQa)
drop _SQasoil*
*xi i.soil_quality|wetQ, pre(_SQE)
*xi i.soil_quality|wetQsq, pre(_SQF)
xi i.soil_quality|drought, pre(_SQd)
drop _SQdsoil*
drop if soil_type==0
xi i.soil_type|wetQ, pre(_STW)
drop _STWsoil*
*xi i.soil_type|wetQsq, pre(_STF)
xi i.soil_type|drought, pre(_STd)
drop _STdsoil*
xi i.soil_type|crop_d, pre(_STc)
drop _STcsoil*
xi i.soil_quality|less , pre(_SQl)
drop _SQlsoil*


*
*
**Instruments
*
*
***
gen goodhouse = wall1==5|wall1==6|floor1==2
gen securetenure= land_tenure1==1|land_tenure1==5
gen lwelfare = ln(welfare)
gen well_settled=years_residence1>15


*Shocks on other plots in hh and community

foreach var of varlist $plotshock {
		egen Z`var' = sum(`var'), by(HHID year)
		egen zZ`var' = sum(`var'), by(village year)
		replace zZ`var'=zZ`var'-`var' /*shocks in community*/
		replace Z`var' = Z`var'- `var'  /*schocks in hh*/
		sum `var'
		local drop = `r(sd)'
		if `drop'==0 {
			drop `var' Z`var' zZ`var'
			}
		
	}


drop _LSQsoil_qu_3

*
*
*Regressions
*
*



gen y = lYva
gen l = lLand
gen x =lLabour

*replace l=lLand_harv	


la var Xgender "Male Plot"
la var lnsale_value "log Plot Value" 
/*
la var Z_SQEsoiXevima_1 "EVI*Good Soil in HH"
la var Z_SQEsoiXevima_2 "EVI*Avg Soil in HH"
la var Z_SQEsoiXevima_3 "EVI*Poor Soil in HH"

la var Z_STEsoiXevima_2 "EVI*Loam in HH"
la var Z_STEsoiXevima_3 "EVI*Clay in HH"
la var Z_STEsoiXevima_4 "EVI*Other in HH"
 */
foreach var of varlist  y l x $RHS $instr Z*  {
	drop if `var'==.
	}
	
cap log close
log using "${results}/TZ_prod_fnct.log", replace
ivregress 2sls y $RHS (l x = $instr Z*  )


*difference out year-crop effects to reduce dimensionality

*reset RHS to drop the year crop effects

global RHS "$lqvars $xqvars $plotshock $hhshockf "     
 
log close

save "${data}/prebootstrap.TZ.pf", replace 

}




*******************
**** Section 1:  Production Function First Stage
*****************

// Two inputs: l=lLand, x=lLabour
// Output: y = lYva (log value added )
// Production function: y = A L^beta_l X^beta_x exp^(W*beta_w) 
// W is plot and household characteristics
{

clear
capture log close
log using "$results/first_stage_TZ.log", replace
use "${data}/prebootstrap.TZ.pf"

foreach var of varlist  y l x $RHS $instr Z*  {
		egen temp = mean(`var'), by(yc)
		replace `var' = `var' - temp
		drop temp
	}



*
*  First Stages, qreg, iqrange, and ols
*

eststo clear

	eststo: reg l $RHS $instr Z* 
	
		testparm  $instr Z* 
		estadd scalar instF =r(F)
		estadd scalar instp =r(p)
		
	eststo: reg x $RHS $instr Z* 
	
		testparm  $instr Z* 
		estadd scalar instF =r(F)
		estadd scalar instp =r(p)

	foreach q of numlist .25 .5 .75 {
		eststo: qreg l $RHS $instr Z* , q(`q') vce(robust)
		
		testparm  $instr Z* 
			estadd scalar instF =r(F)
			estadd scalar instp =r(p)
		eststo: qreg x $RHS $instr Z* , q(`q') vce(robust)
		testparm $instr Z* 
		estadd scalar instF =r(F)
		estadd scalar instp =r(p)
		}
	eststo: iqreg l $RHS $instr Z* 
		testparm $RHS $instr Z*  
		estadd scalar iqF =r(F)
		estadd scalar iqp =r(p)

	eststo: iqreg x $RHS $instr Z* 
		testparm $RHS $instr Z* 
		estadd scalar iqF =r(F)
		estadd scalar iqp =r(p)	
		
		la var Xgender "Plot of Male"
	
		la var livestock_death "Livestock death or stolen*"
		la var illness  "Illness/accident of HH member*"
		la var death_hhm "Death of HH member*"
		la var Zcrops_lost "Adverse shock to household plots*"
		
		

la var lnsale_value  "Land Value"
la var lnsale_valuem "Land Value Missing"
la var dist_home "Distance Home" 
la var dist_road "Distance to Road" 
la var _LSQsoil_qu_1 "Good Soil" 
la var _LSQsoil_qu_2 "Average Soil" 
la var _LSTsoil_ty_1 "Sandy Soil" 
la var _LSTsoil_ty_2 "Loamy Soil" 
la var _LSTsoil_ty_3 "Clay Soil" 
la var single_manager "Single Manager"
la var health  "Poor Health" 
la var healthm "Missing Health" 
la var _XLIliterac_1 "Illiterate"
la var _XLIliterac_9 "Lit Missing"  
la var Xgender "Male Manager" 
la var av_age "Manager Age"
la var av_agem "Age Missing"  
la var crops_lost "Crop Shock" 
la var Z_SQdsoiXdroug_1 "Drought/Flood*Good Soil in HH*" 
la var Z_SQdsoiXdroug_2 "Drought/Flood*Avg Soil in HH*" 
la var Z_SQdsoiXdroug_3 "Drought/Flood*Poor Soil in HH*" 
la var _SQcsoiXcrop__1 "Crop Disease*Good Soil" 
la var _SQcsoiXcrop__2 "Crop Disease*Average Soil"   
la var _SQcsoiXcrop__3 "Crop Disease*Poor Soil" 
la var Z_SQWsoiXwetQ_1  "GS Rain*Good Soil in HH*" 
la var Z_SQWsoiXwetQ_2  "GS Rain*Avg Soil in HH*" 		
la var Z_SQWsoiXwetQ_3  "GS Rain*Poor Soil in HH*" 	
 

la var _STWsoiXwetQ_2 "GS Rainfall*Loamy Soil" 
la var _STWsoiXwetQ_3 "GS Rainfall*Clay Soil" 								 
la var _STWsoiXwetQ_4 "GS Rainfall*Other Soil" 		

la var water_shortage "Water Shortage"
		
la var _SQdsoiXdroug_1 "Drought/Flood*Good Soil" 
la var _SQdsoiXdroug_2 "Drought/Flood*Avg Soil" 
la var _SQdsoiXdroug_3 "Drought/Flood*Poor Soil" 	
	
la var illness "Illness/accident of HH mem"
		
		
esttab using "$results/apndx1stTZ.prod.fnct.csv", csv replace label     ///
	order(Xgender lnsale_value _SQdsoiXdroug_1 _SQdsoiXdroug_2 _SQdsoiXdroug_3 ///
	illness Z_SQWsoiXwetQ_1 Z_SQWsoiXwetQ_2 Z_SQWsoiXwetQ_3 Z_SQdsoiXdroug_1 ///
	Z_SQdsoiXdroug_2 Z_SQdsoiXdroug_3 Zcrops_lost)  ///
	 stats(instF instp iqF iqp)   se(a2) b(a2) obslast nostar


esttab using "$results/1stTZ.prod.fnct.csv", csv replace label     ///
	keep(Xgender lnsale_value _SQdsoiXdroug_1 _SQdsoiXdroug_2 _SQdsoiXdroug_3 ///
	illness Z_SQWsoiXwetQ_1 Z_SQWsoiXwetQ_2 Z_SQWsoiXwetQ_3 Z_SQdsoiXdroug_1 ///
	Z_SQdsoiXdroug_2 Z_SQdsoiXdroug_3 Zcrops_lost)                                                                     ///
	order(Xgender lnsale_value _SQdsoiXdroug_1 _SQdsoiXdroug_2 _SQdsoiXdroug_3 ///
	illness Z_SQWsoiXwetQ_1 Z_SQWsoiXwetQ_2 Z_SQWsoiXwetQ_3 Z_SQdsoiXdroug_1 ///
	Z_SQdsoiXdroug_2 Z_SQdsoiXdroug_3 Zcrops_lost)                                                                   ///
	 stats(instF instp iqF iqp)   se(a2) b(a2) obslast nostar	 
	
	

}


cap log close


*******************
**** Section 1:  Production Function Second Stage and Variance Decomp
*****************
*
* Section 1A: Estimates using 2sls
*
{

	use "${data}/prebootstrap.TZ.pf", clear
	
	
	foreach var of varlist  y l x $RHS $instr Z*  {
		egen temp = mean(`var'), by(yc)
		replace `var' = `var' - temp
		drop temp
	}
	
	eststo clear
	eststo: ivregress 2sls y $RHS (l x = $instr Z* )
		

*
*
*Save results
*
*
	estimates save "${data}/Tz_prod_fnct_2sls.ster", replace 


	mat b2sls = e(b)
	svmat b2sls                   
	scalar nv = colsof(b2sls)     
            /*only obs=1 has data for these variables*/
		

*** Covariances


	qui d $RHS, varlist
	global RHSnames = "`r(varlist)'"
	qui d $xqvars, varlist
	global xqvarnames = "`r(varlist)'" 
	
	qui d $lqvars, varlist
	global lqvarnames = "`r(varlist)'" 


	qui d $plotshock $hhshockf, varlist
	global lshocknames = "`r(varlist)'" 
 
	mkmat b2sls* if _n==1, mat(b2sls)
	matrix colnames b2sls = l x $RHSnames const 

	global alphal= b2sls[1,1]
	global alphax= b2sls[1,2]
	global const = b2sls[1,nv]

	
	
*ONLY GROUPS THAT HAVE INFORMATION
*be sure b2sls remains
foreach var of varlist b2sls* {
	egen temp = mean(`var')
	replace `var' = temp
	drop temp
	}


qui bys yearcropfarmerID : keep if _N>1

** construct dytilde dltilde dxtilde 

	foreach var of varlist y l x $RHS {
		egen temp = mean(`var'), by(yearcropfarmerID)
		gen hh_`var' = `var' - temp
		drop temp
	}

	gen dytilde = y-hh_y
	gen dltilde = l-hh_l
	gen dxtilde = x-hh_x

*regressors assigned to Late Shocks


	foreach var of varlist $lshocknames {
		matrix thisb = b2sls[1,"`var'"]
		local beta = thisb[1,1]
		replace dytilde = dytilde - (hh_`var' * `beta' )  
	}  

*regressors assigned to Land and labour

foreach var of varlist $lqvars {
		matrix thisb = b2sls[1,"`var'"]
		local beta = thisb[1,1]
		replace dltilde = dltilde + (hh_`var' * `beta')  
	}
	

foreach var of varlist $xqvars {
		matrix thisb = b2sls[1,"`var'"]
		local beta = thisb[1,1]
		replace dxtilde = dxtilde + (hh_`var' * `beta')  
	}
	
*calculate ratios for variances needed
	gen rat_xl = dxtilde-dltilde

	gen rat_yl = dytilde-dltilde
	gen rat_yx = dytilde-dxtilde


*calculate variances and covariances needed
	local parameters "sigY sigL sigX sigeY sigeL sigeX sigLX sigYL sigYX"
	local obsvariances "rat_xl rat_yl rat_yx dytilde dltilde dxtilde" 
	matrix covariances = J(9,1,0)
	loc eq = 4

	foreach var of varlist `obsvariances' {
		sum `var'
		loc v =r(sd)^2
		matrix covariances[`eq',1]=`v'
		loc eq = `eq'+1
	}

	
	matrix rownames covariances  = def1 def2 def3 `obsvariances'

	loc multsq = 1
	loc mult2 = 2

	
	matrix def A =                        ///
0,	1,	0,	0,	0,	0,	1,	0,	0\   ///
0,	-1,	1,	0,	0,	0,	0,	0,	0\      ///
0,	0,	0,	0,	0,	0,	0,	1,	1\      ///
0,	1,	1,	0,	1,	1,	-2,	0,	0\      ///
0,	1,	0,	1,	1,	0,	0,	0,	0\      ///
0,	0,	1,	1,	0,	1,	0,	0,	0\      ///
`multsq',	0,	0,	1,	0,	0,	0,	0,	0\      ///
`multsq',	1,	0,	0,	1,	0,	0,	`mult2', 	0\      ///
`multsq',	0,	1,	0,	0,	1,	0,	0,	`mult2'


	matrix rownames A = def1 def2 def3 `obscovariances'

	mata: st_matrix("params",params=lusolve(st_matrix("A"),st_matrix("covariances")))
	matrix rownames params  = `parameters'



	matrix ESTcov2sls = params'
	svmat ESTcov2sls, names(matcol)

	save "${results}/Tzvariancedecomp.2sls.dta", replace
	keep b2sls* ESTcov*
	gen bootstrap = 0
	keep in 1
	save  "${results}/boot_results.2sls.dta", replace           /*name change just for testing replication*/

}	
	
*******************
**** Section 3 TFPA & TFPB
******************


*
* 3A : 2SLS

 
use "${results}/boot_results.2sls.dta", clear                             /*name change just for testing replication*/

mkmat b2sls* if boot==0, mat(b2sls)
local parameters "sigY sigL sigX sigeY sigeL sigeX sigLX sigYL sigYX"

mkmat ESTcov2sls* if boot==0, mat(ESTcov2sls)
matrix colnames ESTcov2sls = `parameters'
global sigeY = ESTcov2sls[1, colnumb(ESTcov2sls,"sigeY")]   /*late risk + meas error in output*/
global sigeL = ESTcov2sls[1, colnumb(ESTcov2sls,"sigeL")]  /*meas error in land*/
global sigeX= ESTcov2sls[1, colnumb(ESTcov2sls,"sigeX")]  /*meas error in labor*/


global lqvars  "lnsale_value lnsale_valuem dist_home dist_road _LSQ* _LST*"   
global xqvars "single_manager health healthm _XLI* Xgender av_age av_agem "


global qvars ="_YC*"




*Plot specific shocks

global plotshock " crops_lost  _SQd* _SQc* _SQW* _STd* _STc*" 
global plotshock " crops_lost  _SQd* _SQc* _SQW*  _STW*"

*HH shocks 

global hhshocknf "livestock_death illness death_hhm burglary"
 
global hhshockf "water_shortage"	
	
global RHS "$lqvars $xqvars $plotshock $hhshockf "     



use "${data}/prebootstrap.TZ.pf", clear
	
	
	foreach var of varlist  y l x $RHS $instr Z*  {
		egen temp = mean(`var'), by(yc)
		replace `var' = `var' - temp
		drop temp
	}


qui d $RHS, varlist
global RHSnames = "`r(varlist)'"



matrix colnames b2sls = l x $RHSnames const 

scalar nv = colsof(b2sls) 

global alphal= b2sls[1,1]
global alphax= b2sls[1,2]
global const = b2sls[1,nv]

global sig_alpha_eps = $alphal ^2 * $sigeL + $alphax ^2 * $sigeX
*
* TFPA
*

gen double lnTFPA = $const   /*naive TFP*/



foreach var of varlist l x $RHSnames {
		replace lnTFPA = lnTFPA + b2sls[1, colnumb(b2sls,"`var'")] * `var' 
	}
	

replace lnTFPA = y - lnTFPA  
	sum lnTFPA 
		

global mulnTFPA = `r(mean)'
global sig2lnTFPA = `r(sd)'^2

*
*TFP B
*


*shrinking levels of TFP


global shrinkY = sqrt((($sig2lnTFPA -$sigeY - $sig_alpha_eps ))/(($sig2lnTFPA ))) 
gen lnTFPB = (lnTFPA - $mulnTFPA)*$shrinkY + $mulnTFPA

 
*create parish name and season to make it simialr to UGANda data and run program to generate eff gains
gen parish_name=village
gen season =1

save "$FA_datasets/data_TFP_TZ.dta", replace

